package com.split;

import org.apache.commons.lang3.StringUtils;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.ExtractByUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;

/**
 * Created by liyan23 on 2016/9/10.
 */
@TargetUrl("http://jobs.zhaopin.com/*.htm")
public class MoveBean {

    @ExtractBy("/html/body/div[5]/div[1]/div[1]/h1")
    private String name;

    //@ExtractBy("/html/body/div[6]/div[1]/div[1]/div/div[1]")
    @ExtractBy(value = "div.tab-inner-cont", type = ExtractBy.Type.Css)
    private String desc;
    @ExtractByUrl("http://jobs.zhaopin.com/\\d+.htm")
    private String url;

    public String getName() {
        if (StringUtils.isNotBlank(name)) {
            name = name.replaceAll("<h1>", "").replaceAll("</h1>", "");
        }
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getDesc() {
        if (StringUtils.isNotBlank(desc)) {
            desc = desc.replaceAll("<div .*?>", "");
            desc=desc.replaceAll("<!--.*?-->","");
            desc=desc.replaceAll("<.*?p.*?>","");
            desc=desc.replaceAll("<br />","");
            desc=desc.replaceAll("</div>","");
            desc = desc.replaceAll("\\n","");
            desc = desc.replaceAll("<.*?h2.*?>","");
            desc = desc.replaceAll("<.*?a.*?>","");
            desc = desc.replaceAll("<.*?b.*?>","");
        }
        return desc;
    }

    public void setDesc(String desc) {
        this.desc = desc;
    }

    public String getUrl() {
        return url;
    }

    public void setUrl(String url) {
        this.url = url;
    }
}
